import collections
import math


def bleu_weighted(pred_seq, label_seq, k):  # 1/2^n
    pred_tokens, label_tokens = pred_seq.split(' '), label_seq.split(' ')
    len_pred, len_label = len(pred_tokens), len(label_tokens)
    score = math.exp(min(0, 1 - len_label / len_pred))
    for n in range(1, k + 1):
        num_matches, label_subs = 0, collections.defaultdict(int)
        for i in range(len_label - n + 1):
            label_subs[' '.join(label_tokens[i: i + n])] += 1
        for i in range(len_pred - n + 1):
            if label_subs[' '.join(pred_tokens[i: i + n])] > 0:
                num_matches += 1
                label_subs[' '.join(pred_tokens[i: i + n])] -= 1
        score *= math.pow(num_matches / (len_pred - n + 1), math.pow(0.5, n))
    return score


def bleu(pred_seq, label_seq, k):  # 1/n
    pred_tokens, label_tokens = pred_seq.split(' '), label_seq.split(' ')
    len_pred, len_label = len(pred_tokens), len(label_tokens)
    score = math.exp(min(0, 1 - len_label / len_pred))
    for n in range(1, k + 1):
        num_matches, label_subs = 0, collections.defaultdict(int)
        for i in range(len_label - n + 1):
            label_subs[' '.join(label_tokens[i: i + n])] += 1
        for i in range(len_pred - n + 1):
            if label_subs[' '.join(pred_tokens[i: i + n])] > 0:
                num_matches += 1
                label_subs[' '.join(pred_tokens[i: i + n])] -= 1
        score *= math.pow(num_matches / (len_pred - n + 1), 1/n)
    return score


def BLEU1_4(pred_seqs, label_seqs):
    b = [0, 0, 0, 0]
    for (pred_seq, label_seq) in zip(pred_seqs, label_seqs):
        for i in range(1,5):
            b[i-1] += bleu(pred_seq, label_seq, i)
    l = len(pred_seqs)
    return [i/l for i in b]



if __name__ == "__main__":
    pred_seqs = ["i come from china , and you ?"]
    label_seqs = ["i come from china , how about you ?"]
    print(BLEU1_4(pred_seqs, label_seqs))